Introduction

Smart Move

Make a smart decision about your next state

What is this app?

This is a simple Shiny application created as part of the Data Visualization course offered by St. Thomas University.

Tha app allows the user to explore demographics across US. The data sources are all governmental sites available to the public, scrapping methods were utilized to grasp some of the data. Long term development plans for this app include the ability to input City values.

How do I use it?

Navigate through the app using the tabs at the top. Further detail and instruction are provided on each tab. A high level overview of what each does presented below:

Health Care: Explores health care quality across the states Weather: Explores the weather conditions and variations per state. Mortality: Explores the causes of death for each state. Crime: Explores crime reports provided by the FBI across the nation. Safety: Explores the distribution of female/male police officers and the ratio to population. Rent: Explores the historical rent prices across the states as well as their decomposition. Income: Explores compensation for several different industries at a state level.

Who made it?

This app was created by Damian Etchevest. Damian is a student of the program of Big Data Analtycs offered at St. Thomas University.

Note: for the purpose of running the graphs, the input will bi Florida and Job will be Chief Executives.

What are the sources?

HealthCare Data: https://nhqrnet.ahrq.gov Weather Data: https://www.usclimatedata.com Mortality: https://www.cdc.gov/ Crime: https://api.usa.gov/crime/fbi/sapi/api/summarized/estimates/states/ Police Data: https://api.usa.gov/crime/fbi/sapi/api/police-employment/states/" Income Data: https://www.bls.gov/oes/current/oes Rent Data: Zillow.com

Visualizations

# Visualizations ####
state  = "Florida"
Job = "Chief Executives"
area = "Access_to_Care"
# libraeries ####
library(shiny)
## Warning: package 'shiny' was built under R version 3.6.3
library(rvest)
## Warning: package 'rvest' was built under R version 3.6.2
## Loading required package: xml2
## Warning: package 'xml2' was built under R version 3.6.2
library(tidyverse)
## Warning: package 'tidyverse' was built under R version 3.6.3
## -- Attaching packages ----------------------------- tidyverse 1.3.0 --
## v ggplot2 3.3.0     v purrr   0.3.3
## v tibble  2.1.3     v dplyr   0.8.5
## v tidyr   1.0.2     v stringr 1.4.0
## v readr   1.3.1     v forcats 0.5.0
## Warning: package 'ggplot2' was built under R version 3.6.3
## Warning: package 'tibble' was built under R version 3.6.2
## Warning: package 'tidyr' was built under R version 3.6.2
## Warning: package 'readr' was built under R version 3.6.2
## Warning: package 'purrr' was built under R version 3.6.2
## Warning: package 'dplyr' was built under R version 3.6.3
## Warning: package 'stringr' was built under R version 3.6.2
## Warning: package 'forcats' was built under R version 3.6.3
## -- Conflicts -------------------------------- tidyverse_conflicts() --
## x dplyr::filter()         masks stats::filter()
## x readr::guess_encoding() masks rvest::guess_encoding()
## x dplyr::lag()            masks stats::lag()
## x purrr::pluck()          masks rvest::pluck()
library(jsonlite)
## Warning: package 'jsonlite' was built under R version 3.6.2
## 
## Attaching package: 'jsonlite'
## The following object is masked from 'package:purrr':
## 
##     flatten
## The following object is masked from 'package:shiny':
## 
##     validate
library(datasets)
library(blsAPI)
library(RSelenium)
## Warning: package 'RSelenium' was built under R version 3.6.3
library(XML)
## Warning: package 'XML' was built under R version 3.6.2
## 
## Attaching package: 'XML'
## The following object is masked from 'package:rvest':
## 
##     xml
library(mongolite)
## Warning: package 'mongolite' was built under R version 3.6.3
library(forecast)
## Warning: package 'forecast' was built under R version 3.6.2
## Registered S3 method overwritten by 'quantmod':
##   method            from
##   as.zoo.data.frame zoo
library(TSA)
## Warning: package 'TSA' was built under R version 3.6.3
## Registered S3 methods overwritten by 'TSA':
##   method       from    
##   fitted.Arima forecast
##   plot.Arima   forecast
## 
## Attaching package: 'TSA'
## The following object is masked from 'package:readr':
## 
##     spec
## The following objects are masked from 'package:stats':
## 
##     acf, arima
## The following object is masked from 'package:utils':
## 
##     tar
library(gganimate)
## Warning: package 'gganimate' was built under R version 3.6.3
library(plotly)
## Warning: package 'plotly' was built under R version 3.6.3
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
# Loading Data ####

url = "mongodb+srv://exolar:39570457Mitu@cluster0-kgbyt.azure.mongodb.net/test?authSource=admin&replicaSet=Cluster0-shard-0&readPreference=primary&appname=MongoDB%20Compass%20Community&ssl=true"

Rent <- read.csv("data/rent.csv")%>% select(-1)

Weather <- read.csv("data/weather.csv") %>% select(-1) 
Weather <-    rename(Weather, "Month"= "X_row" ,
           "High"=names(Weather[1]),
           "Low"=names(Weather[2]),
           "Precipitation" = names(Weather[3]))

Mortality <- read.csv("data/mortality.csv") %>% select(-c(1,2))

Crime<- read.csv("data/crime.csv") %>%
    select(c(3,4,5,6,7,8,9,10,11,12,13,14))

PublicSafety <- mongo("Public Safety","Visualization",url)
## Warning: [WARNING] Cannot override URI option "authSource" from TXT record
## "authSource=admin"
## Warning: [WARNING] Cannot override URI option "replicaSet" from TXT record
## "replicaSet=Cluster0-shard-0"
## Warning: [ERROR] Cannot set appname more than once
Income <- mongo("Income","Visualization",url)$find()
Income$Mean_hourly_wage <- as.numeric(substr(Income$Mean_hourly_wage,start = 2,10))
## Warning: NAs introduced by coercion
Income$Annual_mean_wage <- as.numeric(substr(gsub(",","",Income$Annual_mean_wage),2,10))
## Warning: NAs introduced by coercion
Income$OcupationTitle <- gsub("Occupation","",Income$OcupationTitle)

HealthCare <-mongo("HCRace","Visualization",url)$find() 

HC2 <- read.csv("data/mitu.csv")
# Health Care Plots #### 


data <- filter(HealthCare,State == "Florida")

p <- ggplot(data, mapping = aes(Race_Ethnicity,fill = quality))+
  geom_bar(position = "fill") +
  coord_flip() + 
  labs(x ="Number of Quality Items", y= "Race / Ethnicity")
ggplotly(p)
data <- filter(HC2,State == "Florida")
dfHC1 <- select(data,c(2,4,5,6,7,11)) %>% rename("year" = "Recent_Year",
                                              "Rate" = "Recent_Rate", 
                                              "Performance" ="Recent_Performance") 
dfHC2 <- select(data,c(2,4,8,9,10,11)) %>% rename("year" = "Baseline_Year",
                                              "Rate" = "Baseline_Rate", 
                                              "Performance" ="Baseline_Performance") 
data <- rbind(dfHC1,dfHC2)
data$area <- as.character(data$area)
data$subareas <- as.character(data$subareas) 

data <- data[data$area%in%area,]

# Detail Performance on input area per state
p <- ggplot(data, mapping = aes(subareas,fill = Performance))+
  geom_bar(position = "fill") +
  coord_flip() + 
  labs(x = "Item", y= "Quality Distribution")
ggplotly(p)
# Animation of how it changed the quality of the healthcare per state per area
ggplot(data,mapping = aes(reorder(subareas,Rate),Rate
                           , fill = subareas)) +
      geom_boxplot() + 
  coord_flip() +
  guides(fill =FALSE) +
  # here comes the gganimate code
  transition_states(
    year, 
    transition_length = 2,
    state_length = 1
  ) +
  enter_fade() +
  exit_shrink() +
  ease_aes("sine-in-out") + 
  labs(y = "Sub Area")

# Weather Plots #### 
states <- map_data("state")
data <- Weather
data$avgTemp <- (data$High + 
                     data$Low) / 2
data$region <- str_to_lower(data$State)
data <- merge(states, data, sort = FALSE, by = "region") 

ggplot(data, aes(long,lat)) +
    geom_polygon(aes(group = group, fill = avgTemp)) + 
    coord_fixed(1.3)

data <- subset(Weather,State == state)
data$Month<- substr(data$Month,1,3)
data$months <- 1:12

p <- ggplot(data) +
    geom_point(mapping = aes(Month,High), color = "red") +
    geom_point(mapping = aes(Month,Low), color = "blue") 
ggplotly(p)
data <-subset(Weather,State == state)
# Crime ####
state2 <- grep(state,state.name)
data <- Crime
data <- filter(data, data$state_abbr == state.abb[state2]) %>% distinct()
dfCrime <- data.frame(year = data[2],
                    population = data[3],
                    Value = data[4],
                    Crime = names(data[4]))
dfCrime <-  rename(dfCrime, "Value" =colnames(dfCrime[3]))
for(i in 5:ncol(data)){
  new <- data.frame(year = data[2],
                    population = data[3],
                    Value = data[i],
                    Crime = names(data[i]))
  new <- rename(new, "Value" =colnames(new[3]))
  dfCrime <- rbind(dfCrime,new)
}


# animated plot that shows the growth of crime over the years as population increases
ggplot(dfCrime, mapping = aes(population, Value, colour = Crime)) +
  geom_point(alpha = .7) + 
  scale_size(range = c(2,12)) + # ranges limits the size [2 - 12]
  scale_x_log10(labels = scales::comma) + #disperse bunched values 
  scale_y_log10(labels = scales::comma) + 
  # here comes the gganimate code
  labs(title = "Year: {frame_time}" # information that is changing
       , x = "Number of Cases" , y = "Population") +
  transition_time(year) + # changes each state by year change
  ease_aes("linear") # the aesthetics are chaged linearly
## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

## Warning: Removed 1 rows containing missing values (geom_point).

ggplot(filter(dfCrime,year== 2018),mapping = aes(reorder(Crime,Value),Value
                           , fill = Crime)) +
    geom_col(position ="dodge2",na.rm = TRUE) +
    scale_y_log10(labels = scales::comma) + 
    guides(fill =FALSE)+
    coord_flip() +
    labs(x = "Number of Cases",y = "Crime")

# Income ####
data <- Income
data <- filter(data, OcupationTitle == Job)
data$region <- str_to_lower(data$State)
data <- merge(states, data, sort = FALSE, by = "region")

# Mapping Annual Mean Wage for Specific Job 
ggplot(data, aes(long,lat)) +
  geom_polygon(aes(group = group, fill = Annual_mean_wage)) + 
  coord_fixed(1.3)

data <- filter(Income, State == state)
data$OcupationTitle <- gsub(" s","",data$OcupationTitle)
ggplot(subset(data,Level == "major"),
       mapping = aes(reorder(OcupationTitle,Mean_hourly_wage),Mean_hourly_wage
                     , fill = OcupationTitle)) +
    geom_col(position ="dodge2") +
    guides(fill =FALSE)+
    coord_flip() +
  labs(x = "Hourly Mean Wage",y = "Ocupation Major Title")

# states for that specific job
# for input subset(data,Level == "detail")
data <- filter(Income,OcupationTitle == Job)
ggplot(data,mapping = aes(Annual_mean_wage,reorder(State,Annual_mean_wage))) +
  geom_point()+labs(x=NULL)+
  theme(legend.position ="top") +
  labs(x = "Annual Wage", y = "State")
## Warning: Removed 1 rows containing missing values (geom_point).